program match_in_index
	merge m:1 datem areacode using /// regionname
		"${datadir}\local_price_appreciation_2020", ///
		keep(1 3) nogen keepusing(index)
	
	ren index index_`1'
	ren datem `1'
end




* 1) Open repeat sales in Land Reg (matched with HTB) ----

use "${datadir}\landreg_2013-2020_matched_big", replace

bysort unit_id (dated transaction_id): gen nextyear = year[_n+1]
gen htb = !mi(schemeid)


* 2) Add in redemptions data [as in previous script] ----

merge m:1 schemeid using ///
	"${datadir}\HCA_redemptions_all", keep(1 3) ///
	keepusing(Datepurchased HCACash OriginalHCAshare Transactiontype ///
	Salepriceagree Completion merge_htb_main) gen(merge_repaid)

gen stair = regexm(Transaction, "staircasing") 
			   
	

* 3) Only keep htb properties ----

bysort unit_id (dated transaction_id): egen htbed = max(htb)
keep if htbed



* 4) Match the local authorities ----

merge m:1 pcd using ///
	"${datadir}\la_region_postcode_lookup_2018", ///
	keep(1 3) nogen
	
ren localauthoritycode areacode 


* 5) Create the first date to match, and match it ----

// a. Create the date
ren datem datem_lr

gen datem = datem_lr + 24 if htb & merge_repaid != 3  // unsold
replace datem = mofd(Completion) if htb & stair  // staircased
bysort unit_id (dated transaction_id): replace datem = datem_lr[_n+1] ///
	if htb &!stair & merge_repaid == 3 // sold


// b. match it
match_in_index datem1



* 6) Create the second date to match, and match it ----

gen datem = datem1 + 12

match_in_index datem2



* 7) Create the second date to match, and match it ----

gen datem = datem1 + 24

match_in_index datem3

	



* 8) Restrict the sample to overlapping dates ----

// No sold properties
drop if merge_repaid == 3 & !stair

// no matching, truncation
drop if datem1 > ym(2017,9)

// matched sample
sort _all // for reproducibility
set seed 42
gen random = runiform()

gen matched = 0
forvalues i = `=ym(2015, 5)' / `=ym(2017, 9)' {
	count if datem1 == `i' & stair  
	bysort stair datem1 (random): replace matched = 1 if _n <= r(N) & datem1 == `i'
}

tab datem1 stair if matched




* 9) Compare index returns ----

gen rtr2 = (index_datem2 / index_datem1 - 1) * 100
gen rtr3 = ((index_datem3 / index_datem1)^0.5 - 1) * 100

// Unmatched
ttest rtr2, by(stair)
ttest rtr3, by(stair)

// Matched
ttest rtr2 if matched, by(stair)
ttest rtr3 if matched, by(stair)